######################################
###                                ###
###   Women and Party Building     ###
###                                ###
###  code_wrangling_candidates.R   ###
###                                ###
######################################

# This script cleans the candidate dataset and matches the party initials to the versions in the party membership dataset

rm( list=ls() )
library(dplyr)

cat("Cleaning the candidates dataset...\n")


# This line uses the rstudioapi package to set the working directory to the same folder where this script is saved.
# Alternatively, you can use setwd( PATH ) to set this directory manually
try(setwd(dirname(rstudioapi::getActiveDocumentContext()$path)))

candidates <- readRDS("../1_data/raw/data_raw_Candidates.rds")

# During the years covered in this article's analysis, many Brazilian parties have changed their names, merged into other parties,
# or split apart. This section matches the party names and initials in the raw candidates dataset to the party initials recorded in the
# party membership files (which are based on what the parties were calling themselves in 2020, when the party membership data was downloaded).

# These codings are based on the inheritance of the party membership registry:
#     - If the party merely changed its name, the membership registry remained the same, and so the Party variable is anachronistically coded to the new name 
#     - If the party was founded as the merger of two or more registered parties, it inherited the membership registries of all of the parties that formed it, and so the Party variable is anachronistically coded to the name of the resulting party
#     - If the party split apart into two or more separate parties, the Party variable is coded to the name of the offshoot party that kept the original party's membership registry, according to the TSE (which is usually the same as the offshoot party that kept the original party's name)

candidates <- candidates %>% mutate(Party = dplyr::recode(SIGLA_PARTIDO, "PC do B"="PCDOB",
                                                                  "PC Do B"="PCDOB",
                                                                  "PT DO B"="AVANTE",
                                                                   PFL="DEM",
                                                                   DEMOCRATAS="DEM",
                                                                   PMDB="MDB",
                                                                   PR="PL",
                                                                   PST="PL",
                                                                   PRONA="PL",
                                                                   PPB="PP",
                                                                   PPS="CIDADANIA",
                                                                   PRB="REPUBLICANOS",
                                                                   PRM="REPUBLICANOS",
                                                                   "PT do B"="AVANTE",
                                                                   "PARTIDO TRABALHISTA DO BRASIL"="AVANTE",
                                                                   PSDC="DC",
                                                                   PEN="PATRIOTA",
                                                                   PSN="PHS",
                                                                   PTN="PODE",
                                                                   PODEMOS="PODE",
                                                                   PRN="PTC",
                                                                   SDD="SOLIDARIEDADE",
                                                                   SD="SOLIDARIEDADE",
                                                                   "PARTIDO DOS TRABALHADORES"="PT",
                                                                   "PARTIDO DEMOCRÁTICO TRABALHISTA"="PDT",
                                                                   "PARTIDO DO MOVIMENTO DEMOCRÁTICO BRASILEIRO"="MDB",
                                                                   "PARTIDO DA SOCIAL DEMOCRACIA BRASILEIRA"="PSDB",
                                                                   "PARTIDO DA REPÚBLICA"="PL",
                                                                   "PARTIDO SOCIALISTA BRASILEIRO"="PSB",
                                                                   "PARTIDO SOCIAL CRISTÃO"="PSC",
                                                                   "PARTIDO VERDE"="PV",
                                                                   "PARTIDO DOS APOSENTADOS DA NAÇÃO"="PAN",
                                                                   "PARTIDO COMUNISTA BRASILEIRO"="PCB",
                                                                   "PARTIDO COMUNISTA DO BRASIL"="PCDOB",
                                                                   "PARTIDO DA MOBILIZAÇÃO NACIONAL"="PMN",
                                                                   "PARTIDO LIBERAL"="PL",
                                                                   "PARTIDO POPULAR SOCIALISTA"="CIDADANIA",
                                                                   "PARTIDO HUMANISTA DA SOLIDARIEDADE"="PHS", 
                                                                   "PARTIDO PROGRESSISTA"="PP",
                                                                   "PARTIDO PROGRESSISTA BRASILEIRO"="PP",
                                                                   "PARTIDO RENOVADOR TRABALHISTA BRASILEIRO"="PTRB",
                                                                   "PARTIDO REPUBLICANO BRASILEIRO"="REPUBLICANOS",
                                                                   "PARTIDO REPUBLICANO PROGRESSISTA"="PRP",
                                                                   "PARTIDO SOCIAL DEMOCRATA CRISTÃO"="DC",
                                                                   "PARTIDO SOCIAL LIBERAL"="PSL",
                                                                   "PARTIDO SOCIALISMO E LIBERDADE"="PSOL",
                                                                   "PARTIDO SOCIALISTA DOS TRABALHADORES UNIFICADO"='PSTU',
                                                                   "PARTIDO TRABALHISTA BRASILEIRO"="PTB",
                                                                   "PARTIDO TRABALHISTA CRISTÃO"="PTC",
                                                                   "PARTIDO TRABALHISTA NACIONAL"='PODE',
                                                                   "PARTIDO GERAL DOS TRABALHADORES"="PGT"))

# Clean/code some additional variables
candidates$CPF_CANDIDATO <- suppressWarnings(as.numeric(candidates$CPF_CANDIDATO))  # Removes leading zeroes so that this ID variable matches across all datasets. CPF IDs written in a non-numeric format will be coerced to missing (NA)
candidates$Winner <- candidates$DESC_SIT_TOT_TURNO %in% c("ELEITO", "ELEITO POR MÉDIA", "ELEITO POR QP", "MÉDIA")
candidates$Woman <- candidates$DESCRICAO_SEXO == "FEMININO"

candidates <- unique(candidates)

saveRDS(candidates, "../1_data/cleaned/data_cleaned_Candidates.rds")
rm( list=ls() )
